# Create CNPE theme
cnpe_theme <-
  theme_void(base_family = "poppins") +
  theme(
    plot.title = element_text(size = 72, hjust = 0.5),
    plot.subtitle = element_text(size = 36, hjust = 0.5),
    legend.title = element_text(size = 56),
    legend.text = element_text(size = 48),
    plot.caption = element_text(size = 28, lineheight = 0.6, margin = margin(t = 24)))

cnpe_ranking <- theme(
    axis.text.y = element_text(size = 36, hjust = 1),
    plot.subtitle = element_text(size = 54))

#cnpe_bar <-


cnpe_map <- theme(
    plot.caption = element_text(size = 28, lineheight = 0.6))

BMF_caption <-  
"Data come from the IRS Business Master File.
 Data include all tax-exempt organizations in the Louisville MSA."

BMF_caption_all_orgs <-  
"Data come from the IRS Business Master File.
 Data include all tax-exempt organizations in the Louisville MSA."

BMF_caption_pc <-  
"Data come from the IRS Business Master File.
 Data include public charities in the Louisville MSA."

BMF_xml_caption <-  
"Data come from the IRS Business Master File and 990 series XML files.
 Organizations are classified at the time they receive nonprofit status.
 The IRS' 26 NTEE codes were collapsed into 19 categories based on to CNPE's classifications.
Data include public charities in the Louisville MSA."

BMF_xml_caption_short <-  
"Data come from the IRS Business Master File and 990 series XML files.
Data include public charities in the Louisville MSA."

BMF_inflation_caption <-  
"Data come from the IRS Business Master File.
 Dollar amounts adjusted to 2021 using the C-CPI-U.
 Data include public charities in the Louisville MSA."
# Create needed data for maps and labels
county_df %<>%
  mutate(
    county = str_extract(County, "^.*(?= County)")) %>%
  left_join(map_msa_lou, by = "county")

positions <- st_centroid(st_as_sf(county_df)) %>%
  pull(geometry) %>%
  unlist()

lats <- positions[seq(1, 20, by = 2)]
lons <- positions[seq(2, 20, by = 2)]

cnpe_colors = c("blue" = "#00A3ED", 
                "green" = "#7FBE37", 
                "red" = "#EB382F", 
                "yellow" = "#FAA124", 
                "purple" = "#885A89",
                "dark_blue" = "#18206F",
                "grey" = "#464646")

cnpe_colors = c("#00A3ED", "#7FBE37", "#EB382F", "#FAA124", "#885A89", "#18206F", "#464646")
cnpe_shades = c("#95d3ea", "#3fb0d9", "#1d7595")

county_map <- map_msa_lou 

# Load ohio river based on county borders
map_county <- st_read("C://Users//harri//OneDrive//GLP//glptools//data-raw//maps//county", quiet = TRUE)

# Keep just counties around Louisville
map_county %<>%
  st_transform(4326) %>%
  transmute(
    FIPS = STATEFP %p% COUNTYFP,
    county = NAME)

map_county %<>%
  filter(FIPS %in% c(18025, 18061, 18043, 18019, 18077, 21163, 21093, 21111, 21185, 21223))

# Find borders for the combinations of counties that are adjacent to the Ohio. 
# There an issue with combination #4, so try snapping borders first.

r1 = cartography::getBorders(filter(map_county, FIPS == 18025 | FIPS == 21163))

r1_1 = filter(map_county, FIPS %in% c(18025, 21163)) %>%  st_transform(3857)
r1_2 = filter(map_county, FIPS == 21163) %>%  st_transform(3857)
r1_corrected = st_snap(r1_1, r1_2, 10)
r1 = cartography::getBorders(r1_corrected)
st_crs(r1) = 3857
r1 %<>% st_transform(4326)

r2 = cartography::getBorders(filter(map_county, FIPS == 18061 | FIPS == 21163))
r3 = cartography::getBorders(filter(map_county, FIPS == 18061 | FIPS == 21093))
r4 = cartography::getBorders(filter(map_county, FIPS == 18061 | FIPS == 21111))

r5_1 = filter(map_county, FIPS %in% c(18043, 21111)) %>%  st_transform(3857)
r5_2 = filter(map_county, FIPS == 21111) %>%  st_transform(3857)
r5_corrected = st_snap(r5_1, r5_2, 10)
r5 = cartography::getBorders(r5_corrected)
st_crs(r5) = 3857
r5 %<>% st_transform(4326)

r6 = cartography::getBorders(filter(map_county, FIPS == 18019 | FIPS == 21111))
r7 = cartography::getBorders(filter(map_county, FIPS == 18019 | FIPS == 21185))
r8 = cartography::getBorders(filter(map_county, FIPS == 18019 | FIPS == 21223))
r9 = cartography::getBorders(filter(map_county, FIPS == 18077 | FIPS == 21223))

# Combine points and simpllif
ohio <- bind_rows(r1, r2, r3, r4, r5, r6, r7, r8, r9)

ohio %<>%
  summarize() %>%
  st_cast("LINESTRING") %>%
  rmapshaper::ms_simplify() %>%
  st_buffer(1000, endCapStyle = "FLAT", joinStyle = "MITRE") %>%
  summarize()

#output5 <- output4 %>% smoothr::smooth()

state_labels <- data.frame(
  state = c("Indiana", "Kentucky"),
  long  = c(-85.67, -85.18),
  lat = c(38.69, 38.67)) %>%
  sf::st_as_sf(coords = c("long", "lat"), remove = FALSE)

st_crs(state_labels) <- 4326

Definitions

For the purpose of this report, we’ll refer to organizations this way:

  • Nonprofit organizations are any kind of tax-exempt organizations registered with the IRS. This includes all 501(c) subsections, so this list also includes unions, business and professional associations, fraternal organizations, cemeteries, and any organizations that are able to obtain tax-exempt status. One type of nonprofit organizations is charitable nonprofits.

  • Charitable Nonprofits include all 501(c)(3) organizations. 501(c)(3)s includes the following types of organizations:

    • Foundations,
    • Churches,
    • Schools,
    • Hospital and Medical Research Organizations,
    • Government Entities,
    • and Public Charities.
  • Charitable social impact organizations are 501(c)(3)s that are public charities. The distinction between most of types of charitable nonprofits listed above are fairly straightforward; however, the distinction between public charities and private foundations is nuanced. Roughly speaking, public charities are financially supported by many individual donors and have greater interaction with the public. Private foundations, on the other hand, are typically controlled by members of a family or by a small group of individuals, and they derive much of their support from a small number of sources and from investment income. Private foundations are subject to more reporting requirements more financial regulations, they must pay taxes on investment income, and they have lower limits on tax benefits for their donors. As a result, public charity status is more desirable than private foundation status.

https://www.irs.gov/charities-non-profits/eo-operational-requirements-private-foundations-and-public-charities

Methodology

Definitions

  • Nonprofit - any kind of tax-exempt organizations registered with the IRS. This report includes data on the number of nonprofits of different types.
  • Charitable Nonprofit - Remove? This is just 501(c)(3)s, but we’re doing to drop this term from the report.
  • Social Impact Nonprofit - Nonprofits that are 501(c)(3)s and are public charities.
  • Subsector - When an organization files to become a nonprofit, they choose the type of work they do from a list of categories. This list is slightly condensed to 19 categories for this report. Include IRS subsector to CNPE crosswalk here.
  • 990 - The form that nonprofit organizations file annually with the IRS to report on their finances. This is the equivalent of a tax return for nonprofit orgznizations. Explain: 990, 990PF, 990N.
  • Revenue
  • Income
  • Assets
  • MSA - based on Office of Management and Budget (OMB) Bulletin 20-01 dated March 6, 2020. It includes Clark, Floyd, Harrison, and Washington counties in Indiana and Jefferson, Bullitt, Henry, Oldham, Shelby, and Spencer counties in Kentucky.

Data Sources

Data in this report come from multiple sources:

  • The IRS Business Master File, which contains a list of all nonprofits in the country, including some basic financial information.
  • The Urban Institute National Center for Charitable Statistics Business Master File archive, which contains annual business master file downloads going back to 1995.
  • The IRS 990 database, which contains 990 filings for all tax exempt organizations.

The list of current exempt organizations in Louisville was created using the Business Master File downloaded on June 9th, 2022. Detailed financial information for those organizations was created by extracting the relevant fields from their most recent 990 tax filings.

Time period

990 data for tax exempt organizations is published several months after it is received by the IRS. As a result, at the time this report was created, the most recent data available for most organizations was based on their tax returns for tax year 2020, filed in April 2021, and made public in late 2021. COVID-related delays have also postponed data availability: the IRS is still processing paper-filed 990 series received April 2020 and later to convert to a digital format.

organization_count <- data.frame(
  org_type = factor(
    c("Social Impact Organizations", 
      "Churches", 
      "Foundations and Trusts", 
      "Schools",
      "Hospitals",
      "Other 501(c)s"), 
    levels = c("Social Impact Organizations", 
      "Churches", 
      "Foundations and Trusts", 
      "Schools",
      "Hospitals",
      "Other 501(c)s"), 
    ordered = TRUE),
  count = c(3872, 943, 458, 80, 51, 1295))

orgs <- read_csv("Nonprofits by CNPE Category.csv", col_types = "cnnnn")

Employment

https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=6&year=2021&qtr=A&own=5&area=21111&supp=0

Employment compared to other sectors within Jefferson County

#naics_codes <- read_csv("employment/label_industry.csv")

# Get relevant files
employment_files <- list.files("2021.annual.by_area")
employment_files <- employment_files[str_detect(employment_files, paste0(county_map$FIPS, collapse = "|"))]

# Load county data and subset to relevant rows
for (f in employment_files) {
  
  temp <- read_csv("2021.annual.by_area/" %p% f)
  
  temp %<>% 
    filter(
      agglvl_code == 74,
      own_title == "Private") %>%
    transmute(
      FIPS = as.character(area_fips),
      industry = industry_title,
      employment = annual_avg_emplvl)
  
  employment_county <- assign_row_join(employment_county, temp)
  
}

# Clean NAICS categories
employment_county %<>%
  mutate(
    industry = str_remove(industry, "NAICS") %>% 
      str_remove("\\d{2}-?\\d{0,2} ") %>%
      str_trim())

healthcare_emp <- county_df_detail %>%
  filter(NTEE1 == "Health Care") %>%
  transmute(
    FIPS, 
    industry = "Health care and social assistance",
    Employees) 

employment_county %<>%
  left_join(healthcare_emp, by = c("FIPS", "industry")) %>%
  mutate(
    Employees = replace_na(Employees, 0),
    employment = employment - Employees,
    employment = if_else(employment < 0, 0, employment))

nonprofit_emp_county <- county_df %>%
  transmute(FIPS, 
            industry = "Public Charities",
            employment = Employees)

employment_county %<>%
  bind_rows(nonprofit_emp_county)
#text_align <- if_else(orgs$Nonprofits < 50, -0.2, 1.2)
#nudge_align <- if_else(orgs$Nonprofits < 50, -5, 5)

employment_caption <- "Private industry data is from the Bureau of Labor Statistics' Quarterly Census of Employment and Wages.
Nonprofit employment is from the IRS Business Master File aand 990 Series XML files.
                       Employment for private industries excludes employees at nonprofit organizations."

employment_caption2 <- "Nonprofit employment is from the IRS Business Master File aand 990 Series XML files.
"

this_df <- employment_county %>%
  filter(FIPS == "21111") %>%
  arrange(employment) %>%
  filter(industry != "All NAICS Sectors") %>%
  mutate(
    industry = if_else(industry == "Administrative and Support and Waste Management and Remediation Services",
                       "Administration & Waste Services",
                       industry),
    industry = factor(industry, levels = industry, ordered = TRUE),
    industry_fill = if_else(industry == "Public Charities", "public_charity", "other"))

this_df <- this_df[12:21,]

ggplot(this_df, aes(x = industry, y = employment, fill = industry_fill)) +
  geom_bar(stat="identity") +
  geom_text(aes(label = comma(employment),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
   
  scale_fill_manual(values = c("public_charity" = cnpe_colors[4], "other" = cnpe_colors[1]),
                    guide = "none") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 1))) +
  
  cnpe_theme +
  cnpe_ranking + 
  
  labs(
    title = "10 Largest Employers by Industry",
    subtitle = "Jefferson County",
    caption = employment_caption)

Employment compared to other sectors for the MSA

this_df <- employment_county %>%
  group_by(industry) %>%
  summarize(employment = sum(employment, na.rm = TRUE)) %>%
  arrange(employment) %>%
  filter(industry != "All NAICS Sectors") %>%
  mutate(
    industry = if_else(industry == "Administrative and Support and Waste Management and Remediation Services",
                       "Administration & Waste Services",
                       industry),
    industry = factor(industry, levels = industry, ordered = TRUE),
    industry_fill = if_else(industry == "Public Charities", "public_charity", "other"))

this_df <- this_df[12:21,]

ggplot(this_df, aes(x = industry, y = employment, fill = industry_fill)) +
  geom_bar(stat="identity") +
  geom_text(aes(label = comma(employment),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  scale_y_continuous(expand = expansion(mult = c(0.05, 1))) +
  scale_fill_manual(values = c("public_charity" = cnpe_colors[4], "other" = cnpe_colors[1]), guide = "none") +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "10 Largest Employers by Industry",
    subtitle = "Louisville MSA",
    caption = employment_caption)

Employment compared to other sectors for the MSA counties

Map

this_df <- employment_county %>%
  filter(industry != "All NAICS Sectors") %>%
  group_by(FIPS) %>%
  arrange(desc(employment)) %>%
  mutate(ranking = row_number()) %>%
  ungroup() %>%
  filter(industry == "Public Charities") %>%
  select(FIPS, 
         industry_ranking = ranking)

county_df %<>%
  left_join(this_df, by = "FIPS")

county_df %<>%
  mutate(
    labels = paste0(str_remove(County, " County, .{2}"), "\n", comma(Employees)))
  
  
  # sprintf("%s<br/>%s",
  #         county_df$County,
  #         comma(county_df$Employees)) %>%
  #       lapply(htmltools::HTML)



ggplot(st_as_sf(county_df)) +
  geom_sf(fill = c(cnpe_shades[2], # Clark 
                   cnpe_shades[2], # Floyd
                   cnpe_shades[2], # Harrison
                   cnpe_shades[1], # Washington
                   cnpe_shades[2], # Bullitt
                   cnpe_shades[1], # Henry
                   cnpe_shades[3], # Jefferson
                   cnpe_shades[2], # Oldham
                   cnpe_shades[2], # Shelby
                   cnpe_shades[1]) # Spencer
          ) + 
  
  cnpe_theme +
  cnpe_map +
  
  labs(
    title = "Nonprofit Employment",
    caption = employment_caption2) +

  geom_text(aes(label = labels, 
                x = lats, y = lons),
            size = 18,
            family = "poppins",
            lineheight = 0.6) +
  geom_sf(data = st_as_sf(ohio), fill = "#c2e0f4") +
  geom_text(data = state_labels, 
            aes(label = state, x = long, y = lat),
            size = 24,
            family = "poppins")

The Big Picture

Total number of orgs

ggplot(organization_count, aes(x = "", y = count, fill = org_type)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_polar("y", start = 0, direction = -1) +
  scale_fill_manual(name = "Organization Type",
                    values = cnpe_colors) +
  labs(
    title = "Louisville Area Nonprofits by Organization Type",
    caption = BMF_caption)  +
  cnpe_theme +
  theme(plot.title = element_text(size = 60))

Compared to peer cities

This chart shows the number of nonprofits

# Calculate number of orgs, number of CSIs, and number by CNPE group

bmf_peer_nonprofits <- bmf_peer %>%
  group_by(MSA) %>%
  summarize(
    nonprofits = n(), .groups = "drop")

bmf_peer_CSI <- bmf_peer %>%
  filter(SUBSECTION == "03", org_type == "Public charity") %>%
  group_by(MSA) %>%
  summarize(
    CSI = n(), .groups = "drop")

bmf_peer_summary <- left_join(bmf_peer_nonprofits, bmf_peer_CSI, by = "MSA")

bmf_peer_CNPE <- bmf_peer %>%
  filter(SUBSECTION == "03", org_type == "Public charity") %>%
  group_by(MSA, CNPE) %>%
  summarize(
    CSI = n(), .groups = "drop")

# Adjust for population
pop_df <- glpdata::population_msa_1yr %>%
  filter(year == 2019,
         race == "total", 
         sex == "total") %>%
  select(MSA, population)

bmf_peer_summary %<>%
  left_join(pop_df, by = "MSA")

bmf_peer_summary %<>%
  mutate(
    nonprofits_pp = nonprofits / population * 1000,
    CSI_pp = CSI / population * 1000)

bmf_peer_CNPE %<>%
  left_join(pop_df, by = "MSA")

bmf_peer_CNPE %<>%
  mutate(CSI_pp = CSI / population * 1000)

bmf_peer_CNPE %<>%
  complete(CNPE, MSA) %>%
  filter(!is.na(CNPE))

bmf_peer_CNPE_avg <- bmf_peer_CNPE %>%
  filter(MSA != "31140") %>%
  group_by(CNPE) %>%
  summarize(average = mean(CSI_pp), .groups = "drop")

bmf_quotient <- bmf_peer_CNPE %>%
  filter(MSA == "31140") %>%
  left_join(bmf_peer_CNPE_avg, by = "CNPE") %>%
  mutate(quotient = CSI_pp / average)

Total Nonprofits

This chart is just meant to be an FYI and I suggest using the ones below! This is what you get when you add up the number of nonprofits in the Louisville MSA compared to our peer cities. The reason this data needs more context is that these cities vary widely in size, so it’s not an apples-to-apples comparison. (For example, St. Louis has 2.8 million residents in the MSA compared to Louisville’s 1.3 million.) The charts after this one are adjusted for population.

bmf_peer_summary %<>%
  pull_peers(add_info = TRUE)

bmf_peer_summary %<>%
  arrange(nonprofits) %>%
  mutate(city = factor(city, levels = city, ordered = T),
         color = if_else(city == "Louisville", 
                         "lou", "other"))

ggplot(bmf_peer_summary, aes(x = city, y=nonprofits)) +
  geom_bar(stat="identity", aes(fill = color)) +
  scale_fill_manual(values = c("lou" = cnpe_colors[4], "other" = cnpe_colors[1]),
                    guide = "none") +
  geom_text(aes(label = comma(nonprofits),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Nonprofits per 1,000 residents",
    subtitle = "NOT FOR PUBLICATION--SEE EXPLANATION ABOVE",
    caption = BMF_caption_all_orgs)

Nonprofits

Here is what I would use to answer the question “Does Louisville have too many nonprofits for a city of its size?” These numbers are based on MSAs. Louisville has relatively fewer nonprofits than the average of 6.2 per 1,000.

bmf_peer_summary %<>%
  arrange(nonprofits_pp) %>%
  mutate(city = factor(city, levels = city, ordered = T),
         color = if_else(city == "Louisville", 
                         "lou", "other"))

t=filter(bmf_peer_summary, MSA != "31140") %>% pull(nonprofits_pp) %>% mean()

ggplot(bmf_peer_summary, aes(x = city, y=nonprofits_pp)) +
  geom_bar(stat="identity", aes(fill = color)) +
  scale_fill_manual(values = c("lou" = cnpe_colors[4], "other" = cnpe_colors[1]),
                    guide = "none") +
  geom_text(aes(label = comma(nonprofits_pp, accuracy = 0.1),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Nonprofits per 1,000 residents",
    caption = BMF_caption_all_orgs)

Public Charities

bmf_peer_summary %<>%
  arrange(CSI_pp) %>%
  mutate(city = factor(city, levels = city, ordered = T),
         color = if_else(city == "Louisville", 
                         "lou", "other"))

ggplot(bmf_peer_summary, aes(x = city, y=CSI_pp)) +
  geom_bar(stat="identity", aes(fill = color)) +
  scale_fill_manual(values = c("lou" = cnpe_colors[4], "other" = cnpe_colors[1]),
                    guide = "none") +
  geom_text(aes(label = comma(CSI_pp, accuracy = 0.1),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organizations per 1,000 residents",
    caption = BMF_caption_pc)

Representation by sector

This graph shows a concept I pulled over from labor markets. Here’s a full explanation, though I’d recommend using the one after this in the report, and you can just skip this explanation if you’d like. The location quotient of an industry shows how specialized or over-represented an industry is in an area. To give example, due to the UPS Worldport, Louisville has a high location quotient of shipping andn logistics of something like 1.36. That means Louisville has 1.36 times as many shipping and logistics jobs as the average us city, or to put it another way, Louisville has 36% more shipping and logistics jobs than average.

These numbers show location quotient for the different CNPE sectors compared to our peer cities. So a value of 1 would mean Louisville has an average number of those nonprofits here, while a value of 0.9 means Louisville has 90% of the average number of those organizations here (accounting for population).

The graph shows the size of various sub sectors in Louisville using this metric. The largest subsector in Louisville compared to our peer cities is Civil Rights, Social Action, and Advocacy, where Louisville has 14% more than than average. Meanwhile, Louisville has 8% fewer nonprofits than the average peer city, adjusted for population. (Or you could say we have 92% as many nonprofits as the average peer city.) Finally, we are relatively weakest in “Public, Society Benefit” where we only have 74% as many nonprofits as the average peer city.

This might be useful or this might be too complicated of a concept to explain for just one graph. I can also swap out these “quotients” for percentages–see the following chart for a simpler way to represent this.

quotient_avg <- bmf_peer_summary %>%
  mutate(lou = if_else(MSA == "31140", "lou", "other")) %>%
  group_by(lou) %>%
  summarize(csi_quotient = mean(CSI_pp), .groups = "drop") %>%
  summarize(csi_quotient = csi_quotient[lou == "lou"] / csi_quotient[lou == "other"]) %>%
  pull(csi_quotient)

avg_row <- data.frame(
  CNPE = "All Public Charities",
  quotient = quotient_avg)

bmf_quotient %<>% 
  bind_rows(avg_row) %>%
  arrange(quotient) %>%
  filter(CNPE != "Unknown") %>%
  mutate(CNPE = factor(CNPE, levels = CNPE, ordered = T),
         average_id = if_else(CNPE == "All Public Charities", "average", "not_average"))

ggplot(bmf_quotient, aes(x = CNPE, y=quotient)) +
  geom_bar(stat="identity", 
           aes(fill = average_id)) +
  geom_text(aes(label = comma(quotient, accuracy = 0.01),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  scale_fill_manual(values = c("average" = cnpe_colors[4], 
                               "not_average" = cnpe_colors[1]),
                    guide = "none") +
  coord_flip() +
  
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
  
  geom_hline(yintercept = 1, linetype = "longdash", size = 2) +
  
  cnpe_theme +
  cnpe_ranking +
  
  theme(
    plot.title = element_text(size = 54, hjust = 0.5)) +
  
  labs(
    title = "Reprsentation of Subsectors Compared to Peers",
    subtitle = "NOT FOR PUBLICATION--see next graph",
    caption = BMF_caption)

This chart shows the number of nonprofits in Louisville compared to peer cities broken down by subsector. The orange bar represents all public charities and shows that Louisville has 7.9% fewer nonprofits than averge compared to our peer cities. The other bars reflect individual subsectors: for example, Louisville has 13.6% more Civil Rights, Social Action, and Advocacy organizations than the average of our peer cities.

bmf_quotient %<>% 
  mutate(quotient_pct = quotient - 1)

text_align <- if_else(bmf_quotient$quotient_pct < 0, 1.2, -0.2)

ggplot(bmf_quotient, aes(x = CNPE, y=quotient_pct)) +
  geom_bar(stat="identity", 
           aes(fill = average_id)) +
  geom_text(aes(label = percent(quotient_pct, accuracy = 0.1),
                hjust = text_align),
            size = 16,
            family = "poppins") +
  scale_fill_manual(values = c("average" = cnpe_colors[4], 
                               "not_average" = cnpe_colors[1]),
                    guide = "none") +
  coord_flip() +
  
  scale_y_continuous(expand = expansion(mult = c(0.2, 0.2))) +
  
  cnpe_theme +
  cnpe_ranking +
  
  geom_hline(yintercept = 0, linetype = "longdash", size = 2) +
  
  theme(
    plot.title = element_text(size = 42, hjust = 0.5)) +
  
  labs(
    title = "Social Impact Organizations by Subsector Compared to Peers",
    caption = BMF_caption)

Public charities by subsector

orgs$CNPE[is.na(orgs$CNPE)] <- "Uncategorized"

text_align <- if_else(orgs$Nonprofits < 50, -0.2, 1.2)
nudge_align <- if_else(orgs$Nonprofits < 50, -5, 5)

orgs %<>% arrange(desc(Nonprofits))

ggplot(orgs, aes(x = factor(CNPE, levels = rev(CNPE)), y=Nonprofits)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = comma(Nonprofits),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
  
  cnpe_theme +
  cnpe_ranking +
  
  theme(plot.title = element_text(size = 56)) +
  
  labs(
    title = "Social Impact Organizations by Subsector",
    caption = BMF_caption)

Public charities with employees by subsector

orgs$CNPE[is.na(orgs$CNPE)] <- "Uncategorized"

text_align <- if_else(orgs$Nonprofits < 50, -0.2, 1.2)
nudge_align <- if_else(orgs$Nonprofits < 50, -5, 5)

orgs %<>% arrange(desc(Nonprofits_with_employees))

ggplot(orgs, aes(x = factor(CNPE, levels = rev(CNPE)), y=Nonprofits_with_employees)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = comma(Nonprofits_with_employees),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organizations with Employees by Subsector",
    caption = BMF_xml_caption) +
  
  theme(plot.title = element_text(size = 44)) +
 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))

Revenue

text_align <- if_else(orgs$Revenue > 500000000, 1.2, -0.2)

orgs %<>% arrange(desc(Revenue))

dollar_labels <- 
  case_when(
    orgs$Revenue > 1000000000 ~ dollar(orgs$Revenue, 
                                       scale = 0.000000001, 
                                       suffix = " billion",
                                       accuracy = 0.01),
    orgs$Revenue > 1000000 ~ dollar(orgs$Revenue, 
                                    scale = 0.000001,
                                    suffix = " million",
                                    accuracy = 1),
    orgs$Revenue > 1000 ~ dollar(orgs$Revenue, 
                                 scale = 0.001, 
                                 suffix = " thousand",
                                 accuracy = 0.01))



ggplot(orgs, aes(x = factor(CNPE, levels = rev(CNPE)), y=Revenue)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = dollar_labels,
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organization Revenue by Subsector",
    caption = BMF_xml_caption) +
  
  theme(plot.title = element_text(size = 40)) +
 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.4)))

Employees

text_align <- if_else(orgs$Employees < 2000, -0.2, 1.2)

orgs %<>% arrange(desc(Employees))

ggplot(orgs, aes(x = factor(CNPE, levels = rev(CNPE)), y=Employees)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = comma(Employees),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organization Employees by Subsector",
    caption = BMF_xml_caption) +
  
  theme(plot.title = element_text(size = 40)) +

  scale_y_continuous(expand = expansion(mult = c(0.05, 0.25)))

Payroll

orgs %<>% arrange(desc(Payroll))

dollar_labels <- 
  case_when(
    orgs$Payroll > 1000000000 ~ dollar(orgs$Payroll, 
                                       scale = 0.000000001, 
                                       suffix = " billion",
                                       accuracy = 0.01),
    orgs$Payroll > 1000000 ~ dollar(orgs$Payroll, 
                                    scale = 0.000001,
                                    suffix = " million",
                                    accuracy = 1),
    orgs$Payroll > 1000 ~ dollar(orgs$Payroll, 
                                 scale = 0.001, 
                                 suffix = " thousand",
                                 accuracy = 1),
    orgs$Payroll == 0 ~ "$0")

ggplot(orgs, aes(x = factor(CNPE, levels = rev(CNPE)), y=Payroll)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = dollar_labels,
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organization Payroll by Subsector",
    caption = BMF_xml_caption) +
  
  theme(plot.title = element_text(size = 40)) +
  
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.35)))

by County

Number of organizations

Map

positions <- st_centroid(st_as_sf(county_df)) %>%
  pull(geometry) %>%
  unlist()

lats <- positions[seq(1, 20, by = 2)]
lons <- positions[seq(2, 20, by = 2)]

county_df %<>%
  mutate(
    labels = paste0(str_remove(County, " County, .{2}"), "\n", comma(Nonprofits)))

ggplot(st_as_sf(county_df)) +
  geom_sf(fill = c(cnpe_shades[2], # Clark 
                   cnpe_shades[2], # Floyd
                   cnpe_shades[2], # Harrison
                   cnpe_shades[2], # Washington
                   cnpe_shades[2], # Bullitt
                   cnpe_shades[1], # Henry
                   cnpe_shades[3], # Jefferson
                   cnpe_shades[2], # Oldham
                   cnpe_shades[2], # Shelby
                   cnpe_shades[1]) # Spencer
          ) + 
  
  cnpe_theme +
  cnpe_map +
  
  labs(
    title = "Louisville Region Nonprofits by County, 2022",
    caption = BMF_caption) +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_text(aes(label = labels, 
                x = lats, y = lons),
            size = 18,
            family = "poppins",
            lineheight = 0.6) + 
  geom_sf(data = st_as_sf(ohio), fill = "#c2e0f4") +
  geom_text(data = state_labels, 
            aes(label = state, x = long, y = lat),
            size = 24,
            family = "poppins")

positions <- st_centroid(st_as_sf(county_df)) %>%
  pull(geometry) %>%
  unlist()

lats <- positions[seq(1, 20, by = 2)]
lons <- positions[seq(2, 20, by = 2)]

county_df %<>%
  mutate(
    labels = paste0(str_remove(County, " County, .{2}"), "\n", comma(Nonprofits)))

ggplot(st_as_sf(filter(county_df, County %in% c("Jefferson County, KY", "Floyd County, IN", "Harrison County, IN", "Clark County, IN")))) +

  
  cnpe_theme +
  cnpe_map +
  geom_sf(fill = NA,
          linewidth = 1) +
  geom_sf(data = rmapshaper::ms_simplify(st_as_sf(ohio), keep = 0.05), fill = "#0E4A99")

ggplot(st_as_sf(filter(county_df, County %in% c("Jefferson County, KY", "Floyd County, IN", "Harrison County, IN", "Clark County, IN")))) +

  cnpe_theme +
  cnpe_map +
  geom_sf(data = st_as_sf(ohio), fill = "#0E4A99", stroke = NA)

Graph

text_align <- if_else(county_df$Nonprofits > 6000, 1.2, -0.2)

this_county_df <- county_df %>%
  arrange(!str_detect(County, "KY"), desc(Nonprofits))

ggplot(this_county_df, aes(x = factor(County, levels = rev(County), ordered = T),
                      y=Nonprofits)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = comma(Nonprofits),
                hjust = text_align),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Nonprofits per County",
    caption = BMF_caption) +
  
  theme(axis.text.y = element_text(size = 48)) +
 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.18)))

Number of organizations with employees

Map

positions <- st_centroid(st_as_sf(county_df)) %>%
  pull(geometry) %>%
  unlist()

lats <- positions[seq(1, 20, by = 2)]
lons <- positions[seq(2, 20, by = 2)]

county_df %<>%
  mutate(
    labels = paste0(str_remove(County, " County, .{2}"), "\n", comma(Nonprofits_with_employees)))

ggplot(st_as_sf(county_df)) +
  geom_sf(fill = c(cnpe_shades[2], # Clark 
                   cnpe_shades[2], # Floyd
                   cnpe_shades[2], # Harrison
                   cnpe_shades[2], # Washington
                   cnpe_shades[2], # Bullitt
                   cnpe_shades[1], # Henry
                   cnpe_shades[3], # Jefferson
                   cnpe_shades[2], # Oldham
                   cnpe_shades[2], # Shelby
                   cnpe_shades[1]) # Spencer
          ) + 
  
  cnpe_theme +
  cnpe_map + 
  
  theme(plot.title = element_text(size = 48)) +
  
  labs(
    title = "Social Impact Organizations with Employees per County",
    caption = BMF_xml_caption) +

  geom_text(aes(label = labels, 
                x = lats, y = lons),
            size = 18,
            family = "poppins",
            lineheight = 0.6) + 
  geom_sf(data = st_as_sf(ohio), fill = "#c2e0f4") +
  geom_text(data = state_labels, 
            aes(label = state, x = long, y = lat),
            size = 24,
            family = "poppins")

Graph

text_align <- if_else(county_df$Nonprofits_with_employees > 6000, 1.2, -0.2)

this_county_df <- county_df %>%
  arrange(!str_detect(County, "KY"), desc(Nonprofits_with_employees))

ggplot(this_county_df, aes(x = factor(County, levels = rev(County), ordered = T),
                      y = Nonprofits_with_employees)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = comma(Nonprofits_with_employees),
                hjust = text_align),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organizations with Employees by County",
    caption = BMF_xml_caption_short) +
  
  theme(axis.text.y = element_text(size = 48)) +
  theme(plot.title = element_text(size = 48)) +

  scale_y_continuous(expand = expansion(mult = c(0.05, 0.18)))

Number of Employees

Map

positions <- st_centroid(st_as_sf(county_df)) %>%
  pull(geometry) %>%
  unlist()

lats <- positions[seq(1, 20, by = 2)]
lons <- positions[seq(2, 20, by = 2)]

county_df %<>%
  mutate(
    labels = paste0(str_remove(County, " County, .{2}"), "\n", comma(Employees)))

ggplot(st_as_sf(county_df)) +
  geom_sf(fill = c(cnpe_shades[2], # Clark 
                   cnpe_shades[2], # Floyd
                   cnpe_shades[2], # Harrison
                   cnpe_shades[1], # Washington
                   cnpe_shades[2], # Bullitt
                   cnpe_shades[1], # Henry
                   cnpe_shades[3], # Jefferson
                   cnpe_shades[2], # Oldham
                   cnpe_shades[2], # Shelby
                   cnpe_shades[1]) # Spencer
          ) + 
  
  cnpe_theme +
  cnpe_map + 
  
  labs(
    title = "Social Impact Organization Employment by County",
    caption = employment_caption2) +
  
  theme(plot.title = element_text(size = 48)) +

  geom_text(aes(label = labels, 
                x = lats, y = lons),
            size = 18,
            family = "poppins",
            lineheight = 0.6) + 
  geom_sf(data = st_as_sf(ohio), fill = "#c2e0f4") +
  geom_text(data = state_labels, 
            aes(label = state, x = long, y = lat),
            size = 24,
            family = "poppins")

Graph

text_align <- if_else(county_df$Employees > 6000, 1.2, -0.2)

this_county_df <- county_df %>%
  arrange(!str_detect(County, "KY"), desc(Employees))

ggplot(this_county_df, aes(x = factor(County, levels = rev(County), ordered = T),
                      y=Employees)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = comma(Employees),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organization Employment by County",
    caption = BMF_xml_caption) +
  
  theme(axis.text.y = element_text(size = 48)) +
  theme(plot.title = element_text(size = 48)) +

  scale_y_continuous(expand = expansion(mult = c(0.05, 0.30)))

Revenue

Map

county_df %<>%
  mutate(
    labels = paste0(str_remove(County, " County, .{2}"), "\n", dollar(Revenue, scale = 0.000001, 
                               suffix = "m", accuracy = 0.1)))

ggplot(st_as_sf(county_df)) +
  geom_sf(fill = c(cnpe_shades[3], # Clark 
                   cnpe_shades[2], # Floyd
                   cnpe_shades[2], # Harrison
                   cnpe_shades[1], # Washington
                   cnpe_shades[2], # Bullitt
                   cnpe_shades[1], # Henry
                   cnpe_shades[3], # Jefferson
                   cnpe_shades[2], # Oldham
                   cnpe_shades[2], # Shelby
                   cnpe_shades[1]) # Spencer
          ) + 
  
  cnpe_theme +
  cnpe_map +
  
  labs(
    title = "Social Impact Organization Revenue by County",
    caption = BMF_xml_caption_short) +

  theme(plot.title = element_text(size = 48)) +

  geom_text(aes(label = labels, 
                x = lats, y = lons),
            size = 18,
            family = "poppins",
            lineheight = 0.6) + 
  geom_sf(data = st_as_sf(ohio), fill = "#c2e0f4") +
  geom_text(data = state_labels, 
            aes(label = state, x = long, y = lat),
            size = 24,
            family = "poppins")

Graph

this_county_df <- county_df %>%
  arrange(!str_detect(County, "KY"), desc(Revenue))

ggplot(this_county_df, aes(x = factor(County, levels = rev(County), ordered = T),
                      y=Revenue)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = dollar(Revenue, scale = 0.000001, 
                               suffix = "m", accuracy = 0.1),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organization Revenue by County",
    caption = BMF_xml_caption) +
  
  theme(axis.text.y = element_text(size = 48)) +
  theme(plot.title = element_text(size = 48)) +

  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))

Payroll

Map

county_df %<>%
  mutate(
    labels = paste0(str_remove(County, " County, .{2}"), "\n", dollar(Payroll, scale = 0.000001, 
                               suffix = "m", accuracy = 0.1)))

ggplot(st_as_sf(county_df)) +
  geom_sf(fill = c(cnpe_shades[2], # Clark 
                   cnpe_shades[2], # Floyd
                   cnpe_shades[1], # Harrison
                   cnpe_shades[1], # Washington
                   cnpe_shades[2], # Bullitt
                   cnpe_shades[1], # Henry
                   cnpe_shades[3], # Jefferson
                   cnpe_shades[2], # Oldham
                   cnpe_shades[2], # Shelby
                   cnpe_shades[1]) # Spencer
          ) +
  
  cnpe_theme +
  cnpe_map +
  
  labs(
    title = "Social Impact Organization Payroll by County",
    caption = BMF_xml_caption) +
  
  theme(plot.title = element_text(size = 48)) +

  geom_text(aes(label = labels, 
                x = lats, y = lons),
            size = 18,
            family = "poppins",
            lineheight = 0.6) + 
  geom_sf(data = st_as_sf(ohio), fill = "#c2e0f4") +
  geom_text(data = state_labels, 
            aes(label = state, x = long, y = lat),
            size = 24,
            family = "poppins")

Graph

this_county_df <- county_df %>%
  arrange(!str_detect(County, "KY"), desc(Payroll))

ggplot(this_county_df, aes(x = factor(County, levels = rev(County), ordered = T),
                      y=Payroll)) +
  geom_bar(stat="identity", 
           fill = cnpe_colors[1]) +
  geom_text(aes(label = dollar(Payroll, scale = 0.000001, 
                               suffix = "m", accuracy = 0.1),
                hjust = -0.2),
            size = 16,
            family = "poppins") +
  coord_flip() +
  
  cnpe_theme +
  cnpe_ranking +
  
  labs(
    title = "Social Impact Organization Payroll by County",
    caption = BMF_xml_caption_short) +
  
  theme(axis.text.y = element_text(size = 48)) +
  theme(plot.title = element_text(size = 48)) +
 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.2)))

Funding

Revenue by Source

temp_df <- rev_source_size %>%
  select(-revenue) %>%
  pivot_longer(contributions:CYOtherRevenueAmt) %>%
  mutate(
    name = 
      factor(name,
             levels = c("CYOtherRevenueAmt",
                        "CYInvestmentIncomeAmt",
                        "GovernmentGrantsAmt",
                        "CYProgramServiceRevenueAmt",
                        "contributions"), 
             labels  = c("Other",
                         "Investment Income",
                         "Government",
                         "Program Fees",
                         "Private Contributions and Grants"))) %>%
  filter(`Membership Level` != "All Organizations")

ggplot(temp_df, 
       aes(x = factor(`Membership Level`, levels = unique(`Membership Level`), ordered = T),
           y = value,
           group = name,
           fill = name)) +
  geom_bar(position = "stack", stat = "identity") +

  # labels
  labs(
    title = "Source of Funding Relative to Budget Size",
    y = "% of Total Funding",
    x = "Budget Size",
    caption = BMF_xml_caption_short) +
 
  # scales
  scale_fill_manual(values = rev(cnpe_colors[1:5]),
                     name = "Funding Source") +
  scale_y_continuous(
    labels = percent_format()) +
  
  # geom_label(aes(label = percent(value, accuracy = 1)),
  #            position = position_stack(vjust = 0.5),
  #            size = 12,
  #            label.size = NA
  #            ) +
  
  # theme
  cnpe_theme +
  
  guides(fill = guide_legend(nrow = 2)) +
  
  theme(
    axis.text.y = element_text(size = 48, hjust = 1),
    axis.text.x = element_text(size = 48, angle = 45, hjust = 1, vjust = 1),
    axis.title.x = element_text(size = 60),
    axis.title.y = element_text(size = 48, angle = 0),
    legend.position = "top",
    legend.margin = margin(2, 2, 2, 2),
    legend.title = element_blank(),
    legend.text = element_text(size = 36),
    panel.grid.major.y = element_line(color = "grey50"))

Revenue by Subsector

# , fig.height=10, 
temp_df <- rev_source_cnpe %>%
  pivot_longer(contributions:CYOtherRevenueAmt) %>%
  mutate(value = if_else(value < 0, 0, value)) %>%
  group_by(CNPE) %>%
  mutate(value = value /sum(value)) %>%
  ungroup() %>%
  mutate(
    name = 
      factor(name,
             levels = c("CYOtherRevenueAmt",
                        "CYInvestmentIncomeAmt",
                        "GovernmentGrantsAmt",
                        "CYProgramServiceRevenueAmt",
                        "contributions"), 
             labels  = c("Other",
                         "Investment Income",
                         "Government",
                         "Program Fees",
                         "Private Contributions and Grants")))
  

# ggplot(temp_df, aes(x = "", y = value, fill = name)) +
#   facet_wrap(~CNPE) +
#   geom_bar(stat="identity", width=1, color="white") +
#   coord_polar("y", start=0, direction = -1) +
#   
#   scale_fill_manual(name = "Funding Source",
#                     values = cnpe_colors) +
#   labs(
#     title = "Source of Funding by Subsector")  +
#   cnpe_theme +
#   theme(
#     strip.text = element_text(size = 16))


ggplot(temp_df, aes(x = factor(CNPE, levels = rev(CNPE), labels = rev(CNPE)),
                    y = value,  fill = name)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_flip() +
  
  scale_fill_manual(name = "Funding Source",
                    values = rev(cnpe_colors[1:5]),
                    guide = guide_legend(reverse = TRUE)) +
  labs(
    title = "Source of Funding by Subsector",
    y = "% of Total Funding",
    caption = BMF_xml_caption_short) +
  cnpe_theme +

  scale_y_continuous(
    labels = percent_format(),
    expand = expansion(mult = c(0.05, 0.1))) +
  
  # geom_label(aes(label = percent(value, accuracy = 1)),
  #            position = position_stack(vjust = 0.5),
  #            size = 12,
  #            label.size = NA
  #            ) +
  
  # theme
  cnpe_theme +
  
  guides(fill = guide_legend(nrow = 2)) +
  
  theme(
    axis.text.y = element_text(size = 30, hjust = 1),
    axis.text.x = element_text(size = 48, angle = 45, hjust = 1, vjust = 1),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    legend.text = element_text(size = 36),
    legend.position = "top",
    legend.margin = margin(2, 2, 2, 2),
    legend.title = element_blank())

  #geom_text(aes(label = percent(value, accuracy = 1), position = position_stack(vjust=0.5)))